!! Copyright (C) 2004,2005,2006,2007,2008,2009,2010,2011,2012  Carlo de Falco
!! Copyright (C) 2011,2012  Marco Restelli
!!
!! This file is part of:
!!   LDGH -- Local Hybridizable Discontinuous Galerkin toolkit
!!
!! LDGH is free software: you can redistribute it and/or modify it
!! under the terms of the GNU General Public License as published by
!! the Free Software Foundation, either version 3 of the License, or
!! (at your option) any later version.
!!
!! LDGH is distributed in the hope that it will be useful, but WITHOUT
!! ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
!! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
!! License for more details.
!!
!! You should have received a copy of the GNU General Public License
!! along with LDGH. If not, see <http://www.gnu.org/licenses/>.
!!
!! author: Carlo de Falco             <carlo.defalco _AT_ gmail.com>
!! author: Marco Restelli             <marco.restelli@gmail.com>
!!
!! this module is a port from previous Octave/C++ implementations distributed
!! with the bim and secs3d OctaveForge packages.

!> Local matrix computations for the Scharfetter-Gummel method
!!
!! \n
!!
!<----------------------------------------------------------------------
module mod_scharfetter_gummel

!-----------------------------------------------------------------------

 use mod_kinds, only: &
   wp

 use mod_master_el, only: &
   t_me

 use mod_linal, only: &
   linsys, linsys_chol

!-----------------------------------------------------------------------

 implicit none

!-----------------------------------------------------------------------

! Module interface

 public :: &
!   mod_scharfetter_gummel_constructor, &
!   mod_scharfetter_gummel_destructor,  &
   mod_scharfetter_gummel_initialized, &
   bim3a_local_laplacian,          &
   bim3a_local_scharfetter_gummel, &
   bim3a_local_reaction,           &
   bim3a_local_rhs,                &
   bim3c_global_flux
 
 private

!-----------------------------------------------------------------------

 ! public members
 logical, protected ::               &
   mod_scharfetter_gummel_initialized = .true.
 ! private members
 character(len=*), parameter :: &
   this_mod_name = 'mod_scharfetter_gummel'

!-----------------------------------------------------------------------

contains

!-----------------------------------------------------------------------
  
 !> Local matrix for the Laplace operator
 pure subroutine bim3a_local_laplacian(shg,areak,mu,lloc)
  !> gradients of the basis functions (d,d+1)
  real(wp), intent(in) :: shg(:,:)
  !> element area and diffusion coefficient
  real(wp), intent(in) :: areak, mu
  !> local matrix (d+1,d+1)
  real(wp), intent(out) :: lloc(:,:)

  integer :: i, j, pk

   pk = size(shg,2) ! number of basis functions: d+1
   do i=1,pk
     do j=1,pk
       lloc(i,j) = mu * areak * dot_product( shg(:,i) , shg(:,j) )
     enddo
   enddo
 end subroutine bim3a_local_laplacian

!-----------------------------------------------------------------------

 !> Local rhs
 pure subroutine bim3a_local_rhs(areak, v_coeff, e_coeff, bloc)
  !> element area and diffusion coefficient
  real(wp), intent(in) :: areak
  !> rhs node and element coefficients
  real(wp), intent(in) :: v_coeff(:), e_coeff
  !> local rhs
  real(wp), intent(out) :: bloc(:)

  real(wp) :: dpr

   dpr = real(size(bloc),wp) ! dimension + 1 (real variable)
   bloc = (areak*e_coeff/dpr)*v_coeff

 end subroutine bim3a_local_rhs

!-----------------------------------------------------------------------

 !> Local mass-matrix
 pure subroutine bim3a_local_reaction (areak, node_coef, el_coef, mloc)
  !> element area and piece-wise constant part of the reaction coefficient
  real(wp), intent(in)  :: areak, el_coef
  !> piece-wise linear part of the reaction coefficient
  real(wp), intent(in)  :: node_coef(:)
  !> local mass matrix
  real(wp), intent(out) :: mloc(:,:)
  
  integer :: inode
  real(wp) :: w 
    
   w =  el_coef * areak / 4.0_wp
   
   do inode = 1, size(mloc, 1)
      mloc(inode,inode)  = node_coef (inode) * w
   enddo
    
 end subroutine bim3a_local_reaction

!-----------------------------------------------------------------------

 !> Local matrix for the Laplace operator with SG stabilization
 !!
 !! We follow here <a
 !! href="http://dx.doi.org/10.1007/s007910050012">[Bank, Coughran,
 !! Cowsar, 1998]</a>. Assuming \f$a,\underline{\beta}=const\f$, we
 !! define for each edge \f${\tt e}\f$
 !! \f{displaymath}{
 !!  \psi_{{\tt e}} = a^{-1} \beta_{{\tt e}} \xi, \qquad
 !!  \xi\in[0\,,\,l_{\tt e}],
 !! \f}
 !! with \f$\beta_{{\tt e}} = \frac{1}{l_{{\tt
 !! e}}}\underline{\beta}\cdot\underline{\tt{e}}
 !! \f$, and the local P&eacute;clet number
 !! \f{displaymath}{
 !!  \mathbb{P}e_{{\tt e}} = \frac{\beta_{{\tt e}}l_{{\tt e}}}{2a}.
 !! \f}
 !! This leads to
 !! \f{displaymath}{
 !!  \hat{a}_{{\tt e}} = a\,\mathcal{B}(2\mathbb{P}e_{{\tt e}}),
 !! \f}
 !! with
 !! \f{displaymath}{
 !!  \mathcal{B}(t) = \frac{t}{e^t - 1}.
 !! \f}
 !! A generalization to arbitrary dimension of the difference
 !! operators defined in the cited reference is then
 !! \f{displaymath}{
 !!  \delta_{{\tt e}}(\psi) = \psi(v_{{\tt e},2}) - \psi(v_{{\tt e},1}),
 !! \f}
 !! where \f$v_{{\tt e},1}\f$ and \f$v_{{\tt e},2}\f$ are the vertexes
 !! defining the (oriented) edge \f${\tt e}\f$. The signed distances
 !! \f$s\f$ are
 !! \f{displaymath}{
 !!  s_{{\tt e}} = -|K|l_{{\tt e}}\nabla\varphi_{v_{{\tt e},1}}
 !!  \cdot\nabla\varphi_{v_{{\tt e},2}}.
 !! \f}
 !! With these settings, the discrete formulation (4.9) of theorem 4.1
 !! of <a
 !! href="http://dx.doi.org/10.1007/s007910050012">[Bank, Coughran,
 !! Cowsar, 1998]</a> becomes
 !! \f{displaymath}{
 !!  \sum_{K\in\mathcal{T}_h} \sum_{{\tt e}\in K}
 !!   - a\,\mathcal{B}(2\mathbb{P}e_{{\tt e}})|K|
 !!   \nabla\varphi_{v_{{\tt e},1}} \cdot\nabla\varphi_{v_{{\tt e},2}}
 !!   \delta_{{\tt e}}(e^{\psi_{{\tt e}}}u_h)
 !!   \delta_{{\tt e}}(\varphi_i) = 0.
 !! \f}
 !! We now observe that
 !! \f{displaymath}{
 !!   \delta_{{\tt e}}(e^{\psi_{{\tt e}}}u_h) = e^{2\mathbb{P}e_{{\tt
 !!   e}}} u_{v_{{\tt e},2}} -  u_{v_{{\tt e},1}}, \qquad
 !!   \delta_{{\tt e}}(\varphi_i) = \delta_{i v_{{\tt e},2}} -
 !!   \delta_{i v_{{\tt e},1}},
 !! \f}
 !! define
 !! \f{displaymath}{
 !!   \mathbb{P}e_{{\tt e}_{ij}} = sign( (v_j - v_i) \cdot {\tt e} )
 !!   \mathbb{P}e_{{\tt e}},
 !! \f}
 !! and use the fact that
 !! \f{displaymath}{
 !!   e^{t}\mathcal{B}(t) = \mathcal{B}(-t)
 !! \f}
 !! obtaining
 !! \f{displaymath}{
 !!  \sum_{K\in\mathcal{T}_h} \sum_{j\neq i}
 !!    a\,\mathcal{B}(2\mathbb{P}e_{{\tt e}_{ij}})|K|
 !!   \nabla\varphi_{j} \cdot\nabla\varphi_{i}
 !!   \left( e^{2\mathbb{P}e_{{\tt e}_{ij}}}u_j - u_i \right) = 0.
 !! \f}
 !! This last equation provides the local matrix \f$A^{SG,K}\f$. In
 !! fact, letting
 !! \f{displaymath}{
 !!  A^K_{ij} = a|K| \nabla\varphi_{j} \cdot\nabla\varphi_{i},
 !! \f}
 !! we have
 !! \f{displaymath}{
 !!  A^{SG,K}_{ij} = \left\{
 !!   \begin{array}{cl}
 !!    \displaystyle
 !!    - \sum_{k\neq i}\mathcal{B}(2\mathbb{P}e_{{\tt e}_{ik}})
 !!    A^K_{ik}, & j=i  \\
 !!    \mathcal{B}(-2\mathbb{P}e_{{\tt e}_{ij}}) A^K_{ij}, & j\neq i.
 !!   \end{array}
 !!  \right.
 !! \f}
 !! <em>Notice that \f$A^{SG,K}\f$ is not symmetric.</em>
 !!
 !! \note This subroutine includes a stable evaluation of the Bernoulli
 !! function in terms of
 !! \f{displaymath}{
 !!  \mathcal{B}^+(t) = \mathcal{B}(t) , \qquad
 !!  \mathcal{B}^-(t) = \mathcal{B}(-t),
 !! \f}
 !! with
 !! \f{displaymath}{
 !!  \mathcal{B}^-(t) - \mathcal{B}^+(t) = t, \qquad
 !!  \mathcal{B}^+, \mathcal{B}^- > 0.
 !! \f}
 !!
 !! \note We assume here that the velocity is prescribed in terms of
 !! the nodal potentials \f$\psi_i\f$, so that
 !! \f{displaymath}{
 !!  2\mathbb{P}e_{{\tt e}_{ij}} = \frac{\beta_{{\tt e}_{ij}} l_{{\tt
 !!  e}_{ij} }}{a} = \psi_j - \psi_i.
 !! \f}
 pure subroutine bim3a_local_scharfetter_gummel( me,shg, areak,mu, &
                                                 vloc,sloc )
  !> master element
  type(t_me), intent(in) :: me
  !> gradients of the basis functions (d,d+1)
  real(wp), intent(in)  :: shg(:,:)
  !> element area and diffusion coefficient
  real(wp), intent(in)  :: areak, mu
  !> nodal velocity potential
  real(wp), intent(in)  :: vloc(:)
  !> stabilized local matrix
  real(wp), intent(out) :: sloc(:,:)

  ! side associated quantities
  integer :: i, j, k
  real(wp) :: lloc(size(shg,2),size(shg,2))
  real(wp), dimension(me%children(1)%ns) :: pe2, bp, bm

   ! compute the usual Laplace operator
   call bim3a_local_laplacian(shg,areak,mu,lloc)
   
   ! Compute the Bernoulli functions associated with the edges. The
   ! edges are always defined as the 1-dimensional children simplexes
   ! of the element. For further details, see mod_master_el. Notice
   ! that presently the 1-dimensional childrens are not defined for
   ! 1-dimensional elements (they would coincide with the element),
   ! which implies that this function can not be used for
   ! 1-dimensional problems.
   pe2 = vloc(me%children(1)%s(2,:)) - vloc(me%children(1)%s(1,:))
   call bimu_bernoulli( pe2 , bp , bm )

   ! Now for each edge compute the four contributions to a(i,i),
   ! a(i,j), a(j,i) and a(j,j), being i,j the vertexes of the edge.
   sloc = 0.0_wp
   do k=1,me%children(1)%ns
     ! vertexes of the k-th edge
     i = me%children(1)%s(1,k)
     j = me%children(1)%s(2,k)
     ! diagonal terms: obtained from column-sum equal to zero
     !sloc(i,i) = sloc(i,i) - bm(k)*lloc(i,i)
     !sloc(j,j) = sloc(j,j) - bp(k)*lloc(j,j)
     ! off-diagonal terms
     sloc(i,j) = bp(k)*lloc(i,j)
     sloc(j,i) = bm(k)*lloc(j,i)
   enddo
   do i=1,size(sloc,1)
     ! exploit that so far   sloc(i,i) = 0
     sloc(i,i) = -sum(sloc(:,i))
   enddo

 contains

  elemental subroutine bimu_bernoulli(x,bp,bn)
   real(wp), intent(in)  :: x 
   real(wp), intent(out) :: bp, bn 

   real(wp), parameter :: &
     xlim = 1.0e-2_wp,               &
     tol  = 10.0_wp*epsilon(1.0_wp)
   real(wp) :: ax 
   real(wp) :: jj, fp, fn, df, segno

    ax  = abs(x)
    bp  = 0.0_wp
    bn  = 0.0_wp

    ! x = 0
    x_zero: if(x.eq.0.0_wp) then       
      bp = 1.0_wp
      bn = 1.0_wp
      return
    endif x_zero

    ! asymptotics
    asympt: if(ax.gt.80.0_wp) then
      if(x.gt.0.0_wp) then          
        bp = 0.0_wp
        bn = x
      else          
        bp = -x
        bn = 0.0_wp
      endif
      return
    endif asympt

    ! intermediate values
    int_vals: if((ax.le.80.0_wp).and.(ax.gt.xlim)) then
      bp = x / (exp(x) - 1.0_wp)
      bn = x + bp
      return
    endif int_vals

    ! small values
    small_vals: if((ax.le.xlim).and.(ax.ne.0.0_wp)) then
      jj = 1.0_wp
      fp = 1.0_wp
      fn = 1.0_wp
      df = 1.0_wp
      segno = 1.0_wp
      do
       if(abs(df).le.tol) exit
        jj = jj + 1.0_wp
        segno = -segno
        df = df * x / jj
        fp = fp + df
        fn = fn + segno * df
      enddo
      bp = 1.0_wp / fp;
      bn = 1.0_wp / fn;
      return
    endif small_vals

  end subroutine bimu_bernoulli

 end subroutine bim3a_local_scharfetter_gummel

!-----------------------------------------------------------------------

 !> Post-compute the flux using a representation that is consistent
 !! with the discretization algorithm
 !!
 !! Given the local stabilized stiffness matrix 
 !! \f{displaymath}{ A = [a_{ij}],\ i,j = 1,\ldots, d+1 \f}
 !! and the local nodal values of the solution 
 !! \f{displaymath}{ u_j,\ j = 1,\ldots, d+1 \f}
 !! let 
 !! \f{displaymath}{ r_i = \sum_{j=1}^{d+1} a_{ij} u_j,\  
 !! i = 1,\ldots, d+1 \f}
 !!
 !! To compute the local flux vector, \f$ \mathbf{F} \f$,  
 !! we express it as a linear combination of the gradients of 
 !! the first \f$ d \f$ test functions:
 !!
 !! \f{displaymath}{
 !!   \mathbf{F} = \sum_{k=1}^d f_k \nabla \phi_k 
 !! \f}
 !! 
 !! By testing against the gradients of the first \f$ d \f$
 !! basis functions we get
 !! 
 !! \f{displaymath}{
 !!   \begin{array}{lcl}
 !!   r_i & = & \left( \mathbf{F}, \nabla \phi_i \right) \\[.3cm]
 !!       & = & \sum_{k=1}^d f_k \left( \nabla \phi_k, \nabla \phi_i \right )\\[.3cm]
 !!       & = & \sum_{k=1}^d f_k\  l_{ik}
 !!   \end{array}
 !! \qquad i = 1,\ldots, d
 !! \f}
 !! where \f$ l_{ik} \f$ denotes the elements of the local matrix for 
 !! the Laplacian operator.
 !!
 !! We use this relation to compute the fourier coefficients \f$ f_k \f$
 !! and construct \f$ \mathbf{F} \f$. Note that the principal minors 
 !! of size \f$ d \f$ of the local Laplacian matrix are SPD so they can 
 !! be inverted using a Cholesky factorization.
 

 pure subroutine bim3c_global_flux( me, shg, areak, mu, vloc, uloc, flux )

  !> master element
  type(t_me), intent(in) :: me

  !> gradients of the basis functions (d,d+1)
  real(wp), intent(in)  :: shg(:,:)

  !> element area and diffusion coefficient
  real(wp), intent(in)  :: areak, mu

  !> nodal velocity potential
  real(wp), intent(in)  :: vloc(:)

  !> nodal solution
  real(wp), intent(in) :: uloc(:)

  !> elemental value of the total flux
  real(wp), intent(out) :: flux(size(shg,1))

  integer :: i, j
  real(wp) :: lloc(size(shg,2),size(shg,2))
  real(wp) :: sloc(size(shg,2),size(shg,2))
  real(wp) :: f(size(shg,1)), r(size(shg,1))

  ! compute the usual Laplace operator
  call bim3a_local_laplacian(shg,areak,1.0_wp,lloc)
  
  ! compute the advection-diffusion operator
  call bim3a_local_scharfetter_gummel(me,shg,areak,mu,vloc,sloc)

  r = matmul (sloc, uloc)
  !call linsys (lloc(1:size(shg,1), 1:size(shg,1)), r(1:size(shg,1)), f)
  ! Cholesky factorization is more efficient
  call linsys_chol(lloc(1:size(shg,1),1:size(shg,1)),r(1:size(shg,1)),f)
  
  do i=1,size(shg,1)
     flux(i) = 0
     do j=1,size(shg,1)
        flux (i) = flux(i) + shg(i,j) * f(j)
     enddo
  enddo

 end subroutine bim3c_global_flux

!-----------------------------------------------------------------------

end module mod_scharfetter_gummel
